---
title: "APSR Regressions with Post-Stratification Weights"
author: "Jake Jares"
date: "8/6/2021"
output: pdf_document
---

This program creates Figures 7-10 in "Further Supplemental Information." Note
that these figures correspond to Tables 3-6 (respectively) in the main text.

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE)

library(tidyverse)
library(sandwich)
library(broom)
library(lmtest)
library(MASS)
library(dotwhisker)
library(reshape2)
library(haven)

# Syntax you're about to Google for the 897th time: rm(list=ls())
#rm(list=ls())

# Suppress summarise info
options(dplyr.summarise.inform = FALSE)
options(scipen = 999)
pformat = function(x){
    if(abs(x) >= 1000){
        return(format(round(as.numeric(x)), nsmall=0, big.mark=","))
    }
    else if (is.na(x)){
        return(x)
    }
    else {
        return(signif(x, 4))
    }
}

# CHANGE THE FOLLOWING FILEPATH TO REFLECT LOCATION OF REPLICATION FOLDER
PROJECT_DIR = "/Users/jjares/Documents/research/Farm Subsidies/replication_materials"
DATA = paste0(PROJECT_DIR, "/data")
GRAPHICS = paste0(PROJECT_DIR, "/graphics")

# NOTE: Must run "Create Post-Stratification Weights.do" first to create
# the dataset used in this program.
```

```{r read_in}
regression_data_fpath = paste0(DATA, "/Regression Data with Post-Strat Weights.dta")
keep_columns = c(
    "ExternalDataReference", "entropy_weight",  
    "crp_support", "arc_support", "mfp_support", "gov_avg", "trump_approve", 
    "conservative", "military", "gender", "age", "education", "total_acres", "farm_value_2019",
    "mfp_any", "mfp_total_cnt", "mfp_quintile",
    "conservative_mfp_any", "conservative_mfp_total_cnt", "conservative_mfp_quintile",
    "arc_quintile", "arctreat", "controltreat", "conservative_arc_quintile", "conservative_arctreat",
    "crp_quintile", "crptreat", "conservative_crp_quintile", "conservative_crptreat"
)
df = read_dta(regression_data_fpath, col_select=all_of(keep_columns))
```


```{r define_regression}
run_policy_feedback_reg = function(outcome, treatment, model_name, data=df, weighted=FALSE, subset=rep(TRUE, nrow(df))){
    # `outcome` is "crp_support", "arc_support", "mfp_support", "gov_avg", or "trump_approve"
    # `treatment` can be either a string (e.g. "arc_quintile") or a list 
    # (e.g. c("arc_quintile", "conservative_arc_quintile"))
    
    # Set up regression formula using specified `outcome` and `treatment`
    controls = c("conservative", "military", "gender", "age", "education", "total_acres", "farm_value_2019")
    RHS_variables = c(treatment, controls)
    formula = as.formula(
        paste(outcome, paste(RHS_variables, collapse=" + "), sep=" ~ "),
    )
    
    # Run regression (with or without entropy weights)
    if (weighted) {
        mod = lm(formula, data=data[subset,], weights=data[subset,]$entropy_weight)
    }else {
        mod = lm(formula, data=data[subset,])
    }
    
    # Tidy up model output
    output = tidy(coeftest(mod, vcov = vcovHC(mod, type="HC1")), conf.int=TRUE) %>% 
                  filter(term != "(Intercept)") %>% 
                  mutate(model = model_name, weighted=weighted)
    return(output)
}
```


```{r run_regressions}
# Table 3: MFP Unconditional Effects
tab3_col_1_weighted = run_policy_feedback_reg("mfp_support", "mfp_any", "Col 1")
tab3_col_1_unweighted = run_policy_feedback_reg("mfp_support", "mfp_any", "Col 1",  weighted=TRUE)

tab3_col_2_weighted = run_policy_feedback_reg("mfp_support", "mfp_total_cnt", "Col 2")
tab3_col_2_unweighted = run_policy_feedback_reg("mfp_support", "mfp_total_cnt", "Col 2", weighted=TRUE)

tab3_col_3_weighted = run_policy_feedback_reg("mfp_support", "mfp_quintile", "Col 3")
tab3_col_3_unweighted = run_policy_feedback_reg("mfp_support", "mfp_quintile", "Col 3",  weighted=TRUE)

tab3_col_4_weighted = run_policy_feedback_reg("gov_avg", "mfp_any", "Col 4")
tab3_col_4_unweighted = run_policy_feedback_reg("gov_avg", "mfp_any", "Col 4", weighted=TRUE)

tab3_model_list = list(
    tab3_col_1_weighted, tab3_col_1_unweighted, tab3_col_2_weighted, tab3_col_2_unweighted,
    tab3_col_3_weighted, tab3_col_3_unweighted, tab3_col_4_weighted, tab3_col_4_unweighted
)

# Table 4: MFP Heterogeneity by Ideology
tab4_col_1_weighted = run_policy_feedback_reg("mfp_support", c("mfp_any", "conservative_mfp_any"), "Col 1")
tab4_col_1_unweighted = run_policy_feedback_reg("mfp_support", c("mfp_any", "conservative_mfp_any"), "Col 1",  weighted=TRUE)

tab4_col_2_weighted = run_policy_feedback_reg("mfp_support", c("mfp_total_cnt", "conservative_mfp_total_cnt"),  "Col 2")
tab4_col_2_unweighted = run_policy_feedback_reg("mfp_support", c("mfp_total_cnt", "conservative_mfp_total_cnt"), "Col 2", weighted=TRUE)

tab4_col_3_weighted = run_policy_feedback_reg("mfp_support", c("mfp_quintile", "conservative_mfp_quintile"), "Col 3")
tab4_col_3_unweighted = run_policy_feedback_reg("mfp_support", c("mfp_quintile", "conservative_mfp_quintile"), "Col 3",  weighted=TRUE)

tab4_col_4_weighted = run_policy_feedback_reg("gov_avg", c("mfp_any", "conservative_mfp_any"),  "Col 4")
tab4_col_4_unweighted = run_policy_feedback_reg("gov_avg", c("mfp_any", "conservative_mfp_any"), "Col 4", weighted=TRUE)

tab4_model_list = list(
    tab4_col_1_weighted, tab4_col_1_unweighted, tab4_col_2_weighted, tab4_col_2_unweighted,
    tab4_col_3_weighted, tab4_col_3_unweighted, tab4_col_4_weighted, tab4_col_4_unweighted
)

# Table 5: ARC/PLC
tab5_col_1_weighted = run_policy_feedback_reg("arc_support", "arc_quintile", "Col 1")
tab5_col_1_unweighted = run_policy_feedback_reg("arc_support", "arc_quintile", "Col 1",  weighted=TRUE)

tab5_col_2_weighted = run_policy_feedback_reg("arc_support", c("arc_quintile","conservative_arc_quintile"), "Col 2")
tab5_col_2_unweighted = run_policy_feedback_reg("arc_support", c("arc_quintile","conservative_arc_quintile"), "Col 2",  weighted=TRUE)

tab5_col_3_weighted = run_policy_feedback_reg("arc_support", "arctreat", "Col 3", subset=(df$controltreat | df$arctreat))
tab5_col_3_unweighted = run_policy_feedback_reg("arc_support", "arctreat", "Col 3",  weighted=TRUE, subset=(df$controltreat | df$arctreat))

tab5_col_4_weighted = run_policy_feedback_reg("arc_support", c("arctreat","conservative_arctreat"), "Col 4", subset=(df$controltreat | df$arctreat))
tab5_col_4_unweighted = run_policy_feedback_reg("arc_support", c("arctreat","conservative_arctreat"), "Col 4",  weighted=TRUE, subset=(df$controltreat | df$arctreat))

tab5_col_5_weighted = run_policy_feedback_reg("trump_approve", "arctreat", "Col 5", subset=(df$controltreat | df$arctreat))
tab5_col_5_unweighted = run_policy_feedback_reg("trump_approve", "arctreat", "Col 5",  weighted=TRUE, subset=(df$controltreat | df$arctreat))

tab5_col_6_weighted = run_policy_feedback_reg("gov_avg", "arc_quintile", "Col 6")
tab5_col_6_unweighted = run_policy_feedback_reg("gov_avg", "arc_quintile", "Col 6",  weighted=TRUE)

tab5_col_7_weighted = run_policy_feedback_reg("gov_avg", c("arc_quintile","conservative_arc_quintile"), "Col 7")
tab5_col_7_unweighted = run_policy_feedback_reg("gov_avg", c("arc_quintile","conservative_arc_quintile"), "Col 7",  weighted=TRUE)


tab5_model_list = list(
    tab5_col_1_weighted, tab5_col_1_unweighted, tab5_col_2_weighted, tab5_col_2_unweighted,
    tab5_col_3_weighted, tab5_col_3_unweighted, tab5_col_4_weighted, tab5_col_4_unweighted,
    tab5_col_5_weighted, tab5_col_5_unweighted, tab5_col_6_weighted, tab5_col_6_unweighted,
    tab5_col_7_weighted, tab5_col_7_unweighted

)

# Table 6: CRP
tab6_col_1_weighted = run_policy_feedback_reg("crp_support", "crp_quintile", "Col 1")
tab6_col_1_unweighted = run_policy_feedback_reg("crp_support", "crp_quintile", "Col 1",  weighted=TRUE)

tab6_col_2_weighted = run_policy_feedback_reg("crp_support", c("crp_quintile","conservative_crp_quintile"), "Col 2")
tab6_col_2_unweighted = run_policy_feedback_reg("crp_support", c("crp_quintile","conservative_crp_quintile"), "Col 2",  weighted=TRUE)

tab6_col_3_weighted = run_policy_feedback_reg("crp_support", "crptreat", "Col 3", subset=(df$controltreat | df$crptreat))
tab6_col_3_unweighted = run_policy_feedback_reg("crp_support", "crptreat", "Col 3",  weighted=TRUE, subset=(df$controltreat | df$crptreat))

tab6_col_4_weighted = run_policy_feedback_reg("crp_support", c("crptreat","conservative_crptreat"), "Col 4", subset=(df$controltreat | df$crptreat))
tab6_col_4_unweighted = run_policy_feedback_reg("crp_support", c("crptreat","conservative_crptreat"), "Col 4",  weighted=TRUE, subset=(df$controltreat | df$crptreat))

tab6_col_5_weighted = run_policy_feedback_reg("trump_approve", "crptreat", "Col 5", subset=(df$controltreat | df$crptreat))
tab6_col_5_unweighted = run_policy_feedback_reg("trump_approve", "crptreat", "Col 5",  weighted=TRUE, subset=(df$controltreat | df$crptreat))

tab6_col_6_weighted = run_policy_feedback_reg("gov_avg", "crp_quintile", "Col 6")
tab6_col_6_unweighted = run_policy_feedback_reg("gov_avg", "crp_quintile", "Col 6",  weighted=TRUE)

tab6_col_7_weighted = run_policy_feedback_reg("gov_avg", c("crp_quintile","conservative_crp_quintile"), "Col 7")
tab6_col_7_unweighted = run_policy_feedback_reg("gov_avg", c("crp_quintile","conservative_crp_quintile"), "Col 7",  weighted=TRUE)


tab6_model_list = list(
    tab6_col_1_weighted, tab6_col_1_unweighted, tab6_col_2_weighted, tab6_col_2_unweighted,
    tab6_col_3_weighted, tab6_col_3_unweighted, tab6_col_4_weighted, tab6_col_4_unweighted,
    tab6_col_5_weighted, tab6_col_5_unweighted, tab6_col_6_weighted, tab6_col_6_unweighted,
    tab6_col_7_weighted, tab6_col_7_unweighted

)

```

```{r table_3}

tab3 = do.call("rbind", tab3_model_list) %>%
       mutate(
           x = case_when(
               model == "Col 1" & !weighted ~ 0.95,
               model == "Col 1" & weighted ~ 1.05,
               model == "Col 2" & !weighted ~ 1.45,
               model == "Col 2" & weighted ~ 1.55,
               model == "Col 3" & !weighted ~ 1.95,
               model == "Col 3" & weighted ~ 2.05,
               model == "Col 4" & !weighted ~ 2.45,
               model == "Col 4" & weighted ~ 2.55
        ),
        term = factor(term, levels = c("mfp_any", "mfp_total_cnt", "mfp_quintile",
                                 "conservative", "military", "gender", 
                                 "age", "education", "total_acres", "farm_value_2019") )
        )

term.label = c("MFP (Binary)",
               "MFP (Number of Years)",
               "MFP (Quintile)",
               "Conservative",
               "Veteran",
               "Gender",
               "Age",
               "Education",
               "Farm Size (Tens of Thousands of Acres)",
               "Farm Value (Tens of Millions of 2019 dollars)")
names(term.label) = c("mfp_any", "mfp_total_cnt", "mfp_quintile",
               "conservative", "military", "gender", 
               "age", "education", "total_acres", "farm_value_2019")

plot = ggplot(data = tab3, mapping = aes(x = x, y = estimate, color = weighted)) + 
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray", alpha = 0.6) +
    geom_point() +
    geom_linerange(mapping = aes(x = x, ymin = conf.low, ymax = conf.high)) + 
    facet_wrap(~ term, 
               nrow = 5,
               labeller = labeller(term=term.label),
               scales = "free_y") + 
    theme_classic() +
    scale_x_continuous(
        breaks = c(1, 1.5, 2, 2.5),
        labels = c("Model 1", "Model 2", "Model 3", "Model 4") 
    ) +
    theme(
        strip.background = element_rect(fill = "white"),
        legend.position = "top",
        plot.title = element_text(hjust = 0.5)
    ) +
    labs(x = "", y = "") + 
    scale_color_discrete(name = "",
                         breaks = c(FALSE, TRUE),
                         labels = c("Unweighted", "Weighted"))

ggsave(paste0(GRAPHICS, "/Further Supplemental Information Figure 7.png"),
       plot=plot,
       width = 7, height = 7, units = "in")

```

```{r table_4}
tab4_regressors = c(
    "mfp_any", "conservative_mfp_any", "mfp_total_cnt", "conservative_mfp_total_cnt", "mfp_quintile", "conservative_mfp_quintile",
    "conservative", "military", "gender", "age", "education", "total_acres", "farm_value_2019"
)
tab4 = do.call("rbind", tab4_model_list) %>%
       mutate(
           x = case_when(
               model == "Col 1" & !weighted ~ 0.95,
               model == "Col 1" & weighted ~ 1.05,
               model == "Col 2" & !weighted ~ 1.45,
               model == "Col 2" & weighted ~ 1.55,
               model == "Col 3" & !weighted ~ 1.95,
               model == "Col 3" & weighted ~ 2.05,
               model == "Col 4" & !weighted ~ 2.45,
               model == "Col 4" & weighted ~ 2.55
        ),
        term = factor(term, levels=tab4_regressors))

term.label=c(
    "MFP (Binary)", "MFP (Binary) x Conservative", "MFP (Number of Years)", "MFP (Number of Years) x Conservative",
    "MFP (Quintile)", "MFP (Quintile) x Conservative",  "Conservative", "Veteran", "Gender", "Age", 
    "Education", "Farm Size (Tens of Thousands of Acres)", "Farm Value (Tens of Millions of 2019 dollars)"
)
names(term.label) = tab4_regressors

plot = ggplot(data = tab4, mapping = aes(x = x, y = estimate, color = weighted)) + 
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray", alpha = 0.6) +
    geom_point() +
    geom_linerange(mapping = aes(x = x, ymin = conf.low, ymax = conf.high)) + 
    facet_wrap(~ term, 
               nrow = 7,
               labeller = labeller(term=term.label),
               scales = "free_y") + 
    theme_classic() +
    scale_x_continuous(
        breaks = c(1, 1.5, 2, 2.5),
        labels = c("Model 1", "Model 2", "Model 3", "Model 4") 
    ) +
    theme(
        strip.background = element_rect(fill = "white"),
        legend.position = "top",
        plot.title = element_text(hjust = 0.5)
    ) +
    labs(x = "", y = "") + 
    scale_color_discrete(name = "",
                         breaks = c(FALSE, TRUE),
                         labels = c("Unweighted", "Weighted"))

ggsave(paste0(GRAPHICS, "/Further Supplemental Information Figure 8.png"),
       plot=plot,
       width = 7, height = 9.5, units = "in")

```

```{r table_5}
tab5_regressors = c(
    "arc_quintile", "conservative_arc_quintile", "arctreat","conservative_arctreat",
    "conservative", "military", "gender", "age", "education", "total_acres", "farm_value_2019"
)
tab5 = do.call("rbind", tab5_model_list) %>%
       mutate(
           x = case_when(
               model == "Col 1" & !weighted ~ 0.95,
               model == "Col 1" & weighted ~ 1.05,
               model == "Col 2" & !weighted ~ 1.45,
               model == "Col 2" & weighted ~ 1.55,
               model == "Col 3" & !weighted ~ 1.95,
               model == "Col 3" & weighted ~ 2.05,
               model == "Col 4" & !weighted ~ 2.45,
               model == "Col 4" & weighted ~ 2.55,
               model == "Col 5" & !weighted ~ 2.95,
               model == "Col 5" & weighted ~ 3.05,
               model == "Col 6" & !weighted ~ 3.45,
               model == "Col 6" & weighted ~ 3.55,
               model == "Col 7" & !weighted ~ 3.95,
               model == "Col 7" & weighted ~ 4.05,
        ),
        term = factor(term, levels=tab5_regressors))

term.label=c(
    "ARC/PLC  Quintile", "ARC/PLC  Quintile x Conservative", "ARC/PLC Experimental Treatment", "ARC/PLC Treatment x Conservative",
    "Conservative", "Veteran", "Gender", "Age", 
    "Education", "Farm Size (Tens of Thousands of Acres)", "Farm Value (Tens of Millions of 2019 dollars)"
)
names(term.label) = tab5_regressors

plot = ggplot(data = tab5, mapping = aes(x = x, y = estimate, color = weighted)) + 
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray", alpha = 0.6) +
    geom_point() +
    geom_linerange(mapping = aes(x = x, ymin = conf.low, ymax = conf.high)) + 
    facet_wrap(~ term, 
               nrow = 6,
               labeller = labeller(term=term.label),
               scales = "free_y") + 
    theme_classic() +
    scale_x_continuous(
        breaks = c(1, 1.5, 2, 2.5, 3, 3.5, 4),
        labels = c("Mod 1", "Mod 2", "Mod 3", "Mod 4", "Mod 5", "Mod 6", "Mod 7") 
    ) +
    theme(
        strip.background = element_rect(fill = "white"),
        legend.position = "top",
        plot.title = element_text(hjust = 0.5)
    ) +
    labs(x = "", y = "") + 
    # +
    scale_color_discrete(name = "",
                         breaks = c(FALSE, TRUE),
                         labels = c("Unweighted", "Weighted"))

ggsave(paste0(GRAPHICS, "/Further Supplemental Information Figure 9.png"),
       plot=plot,
       width = 7, height = 9, units = "in")

```


```{r table_6}
tab6_regressors = c(
    "crp_quintile", "conservative_crp_quintile", "crptreat","conservative_crptreat",
    "conservative", "military", "gender", "age", "education", "total_acres", "farm_value_2019"
)
tab6 = do.call("rbind", tab6_model_list) %>%
       mutate(
           x = case_when(
               model == "Col 1" & !weighted ~ 0.95,
               model == "Col 1" & weighted ~ 1.05,
               model == "Col 2" & !weighted ~ 1.45,
               model == "Col 2" & weighted ~ 1.55,
               model == "Col 3" & !weighted ~ 1.95,
               model == "Col 3" & weighted ~ 2.05,
               model == "Col 4" & !weighted ~ 2.45,
               model == "Col 4" & weighted ~ 2.55,
               model == "Col 5" & !weighted ~ 2.95,
               model == "Col 5" & weighted ~ 3.05,
               model == "Col 6" & !weighted ~ 3.45,
               model == "Col 6" & weighted ~ 3.55,
               model == "Col 7" & !weighted ~ 3.95,
               model == "Col 7" & weighted ~ 4.05,
        ),
        term = factor(term, levels=tab6_regressors))

term.label=c(
    "CRP Quintile", "CRP Quintile x Conservative", "CRP Experimental Treatment", "CRP Treatment x Conservative",
    "Conservative", "Veteran", "Gender", "Age", 
    "Education", "Farm Size (Tens of Thousands of Acres)", "Farm Value (Tens of Millions of 2019 dollars)"
)
names(term.label) = tab6_regressors

plot = ggplot(data = tab6, mapping = aes(x = x, y = estimate, color = weighted)) + 
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray", alpha = 0.6) +
    geom_point() +
    geom_linerange(mapping = aes(x = x, ymin = conf.low, ymax = conf.high)) + 
    facet_wrap(~ term, 
               nrow = 6,
               labeller = labeller(term=term.label),
               scales = "free_y") + 
    theme_classic() +
    scale_x_continuous(
        breaks = c(1, 1.5, 2, 2.5, 3, 3.5, 4),
        labels = c("Mod 1", "Mod 2", "Mod 3", "Mod 4", "Mod 5", "Mod 6", "Mod 7") 
    ) +
    theme(
        strip.background = element_rect(fill = "white"),
        legend.position = "top",
        plot.title = element_text(hjust = 0.5)
    ) +
    labs(x = "", y = "") + 
    scale_color_discrete(name = "",
                         breaks = c(FALSE, TRUE),
                         labels = c("Unweighted", "Weighted"))
ggsave(paste0(GRAPHICS, "/Further Supplemental Information Figure 10.png"),
       plot=plot,
       width = 7, height = 9, units = "in")

```

